# coding=utf-8
'''
@LastEditors  : FlyingHu
@Date         : 2020-05-22 07:32:16
@LastEditTime : 2020-05-22 08:00:10
@Description  : 文章工具包
'''
from typing import Dict
import re
from collections import Counter


def cut_article(article: str) -> Dict[str, int]:
    '''
    @Description: 将文章分词, 获取所有单词以及出现次数
    @Args: article 文章全文
    @Returns: 分词后的字典
    '''
    # 第一步, 将其余符号以及多余的空格换成一个空格, 只保留数字和英文字母
    tmp = re.sub(r"[^\w\d]+", " ", article)
    word_counter = Counter(tmp.split(" "))
    return dict(word_counter)
    



if __name__ == "__main__":
    cut_article("Hooray! It's snowing! It's time to make a snowman.James runs out. He makes a big pile of snow. He puts a big snowball on top. He adds a scarf and a hat. He adds an orange for the nose. He adds coal for the eyes and buttons.In the evening, James opens the door. What does he see? The snowman is moving! James invites him in. The snowman has never been inside a house. He says hello to the cat. He plays with paper towels.A moment later, the snowman takes James's hand and goes out.They go up, up, up into the air! They are flying! What a wonderful night!The next morning, James jumps out of bed. He runs to the door.He wants to thank the snowman. But he's gone.")
